#!/usr/bin/env python
# encoding: utf-8
"""
#-------------------------------------------------------------------#
#                   CONFIDENTIAL --- CUSTOM STUDIOS                 #     
#-------------------------------------------------------------------#
#                                                                   #
#                   @Project Name : Globallawonline                #
#                                                                   #
#                   @File Name    : highfreword.py                      #
#                                                                   #
#                   @Programmer   : 李建                            #
#                                                                   #  
#                   @Start Date   : 2021/4/29 14:07                 #
#                                                                   #
#                   @Last Update  : 2021/4/29 14:07                 #
#                                                                   #
#-------------------------------------------------------------------#
# Classes:统计待校对数据高频词                                                          #
#                                                                   #
#-------------------------------------------------------------------#
"""
import pymysql
import re


def addindict(wordlist:list, worddict:dict):
    for word in wordlist:
        worddict[word] = worddict[word] + 1
    return worddict

JGMYSQL_CONNEXT = {
    'MYSQL_HOST': '114.116.40.73',
    'MYSQL_PORT': 3306,
    'MYSQL_DB': 'southwestpoliticsun',
    'MYSQL_USER': 'root',
    'MYSQL_PASSWD': 'xinanzhengfadaqxueapp02!',
    'MYSQL_CHARSET': 'utf8'
}
jgconn = pymysql.connect(host=JGMYSQL_CONNEXT["MYSQL_HOST"],
                              port=JGMYSQL_CONNEXT["MYSQL_PORT"],
                              db=JGMYSQL_CONNEXT["MYSQL_DB"],
                              user=JGMYSQL_CONNEXT["MYSQL_USER"],
                              passwd=JGMYSQL_CONNEXT["MYSQL_PASSWD"],
                              charset=JGMYSQL_CONNEXT["MYSQL_CHARSET"])
jgcuror = jgconn.cursor()
sql = '''SELECT LegalName,Organizaation FROM `law` WHERE SortB = "LANGUAGEGMY" AND (SYS_FLD_MARKSTATE = "2" or SYS_FLD_MARKSTATE = "3" or SYS_FLD_MARKSTATE = "4")'''
try:
    jgcuror.execute(sql)
    trs = jgcuror.fetchall()
except Exception as e:
    print(str(e))
word_dict = {}  # 词频字典
for tr in trs:
    for word in tr:
        word = word.replace("(", "")
        word = word.replace(")", "")
        word = word.replace(",","")
        word = word.replace("“","")
        word = word.replace("”", "")
        word = word.replace(";","")
        word = word.replace(":", "")
        word_list = word.split(" ")
        for word in word_list:
            dates = re.findall('''[0-9]+''',word,re.S)
            if len(word) < 2:  # 高棉语排除字长小于2的词，越南语排除字长小于2的词，泰语排除字长小于5的词
                continue
            elif len(dates):    # 排除越南语日期
                continue
            else:
                word_dict[word] = word_dict.get(word, 0) + 1
list1 = list(word_dict.items())     # 将字典中的键值对转化为列表
list1.sort(key=lambda x: x[1], reverse=True)    # 对列表按照词频从大到小排列
for i in range(101):
    world, number = list1[i]     # 将列表中的word与number提取出来
    with open("gmygpc.txt", 'a', encoding='utf-8') as f:
        f.write(world + ": " + str(number) + "\n")
jgcuror.close()
jgconn.close()
